# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup
import requests
import re
def getdata(url, a, b, c, num):
    wy_data = requests.get(url)
    soup = BeautifulSoup(wy_data.text, "html.parser")
    team_names = soup.select('table.table a')  # 爬取队名
    three = {}
    classesandnumsandnames = re.findall('<td>(.*?)</td>', wy_data.text)  # 爬取班级、学号、姓名
    sexs = soup.find_all('td', style=re.compile("width: 80px"))  # 爬取性别
    statuses = soup.find_all('td', style=re.compile("width: 100px"))  # 爬取注册状态
    for team_name, sex, status in zip(team_names, sexs, statuses):  # 循环输出
        three['class'] = classesandnumsandnames[a]
        three['ID'] = classesandnumsandnames[b]
        three['name'] = classesandnumsandnames[c]
        f.writelines('第' + str(num) + '位同学的注册信息\n')  # 将信息写入acm.txt
        f.writelines('队名:' + team_name.get_text() + '\n')
        f.writelines('班级:' + three['class'] + '\n')
        f.writelines('学号:' + three['ID'] + '\n')
        f.writelines('姓名:' + three['name'] + '\n')
        f.writelines('性别:' + sex.get_text() + '\n')
        f.writelines('注册状态:' + status.get_text() + '\n')
        f.writelines('----------------\n')
        a += 3
        b += 3
        c += 3
        num += 1
    

if __name__ == '__main__':
    page = 1
    total_page = 11
    a = 0
    b = 1
    c = 2
    num = 1
    f = open('acm.txt', 'a')

    while page <= total_page:
        try:
            url = "http://acm.scau.edu.cn:8000/uoj/register/displayActivity.html?id=29&page=" + str(page)
            getdata(url, a, b, c, num)
            page += 1
            num += 20
            
        except:
            print('爬取失败')    
    
        finally:
            print('第' + str(page - 1) + '页爬取完成')
    
